#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          Validate that the MaxSpareNodeCount is enforced
############################################################################
# Copyright (C) 2011-2014 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
# All rights reserved
############################################################################
source ./globals

set node_name    ""
set file_in      "$test_dir/script"
set spare_node   0
set new_node     ""
set fail_node    ""
set job_id       0

proc cleanup {} {
	global job_id

	cancel_job $job_id
}

if {![param_contains [get_config_param "SlurmctldPlugstack"] "nonstop"]} {
	skip "This test is only compatible when SlurmctldPlugstack includes nonstop"
}
if {![is_super_user]} {
	skip "This test is only suitable for a super user (to restore down nodes)"
}

make_bash_script $file_in "$bin_sleep 100"

proc get_max_spare_node_cnt {} {
	global smd number

	set spare_node_cnt 0
	spawn $smd -c
	expect {
		-re "MaxSpareNodeCount: ($number)" {
			set spare_node_cnt $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "smd is not responding"
		}
		eof {
			wait
		}
	}
	return $spare_node_cnt
}

proc get_node {job_id} {
	global scontrol squeue re_word_str re_word_str

	set node_list ""
	spawn $squeue -j $job_id --noheader -o NodeList=%N
	expect {
		-re "NodeList=($re_word_str)" {
			set node_list $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "squeue is not responding"
		}
		eof {
			wait
		}
	}

	set node_name ""
	spawn $scontrol show hostnames $node_list
	expect {
		-re "($re_word_str)" {
			set node_name $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "scontrol is not responding"
		}
		eof {
			wait
		}
	}
	return $node_name
}

proc reset_state {node} {
	global scontrol

	spawn $scontrol update NodeName=$node State=RESUME
	expect {
		timeout {
			fail "scontrol is not responding"
		}
		eof {
			wait
		}
	}
}

#
# Submit batch script to test
#
set job_id 0
spawn $sbatch -N2 --no-kill --output=/dev/null $file_in
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		fail "sbatch is not responding"
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	fail "sbatch did not submit job"
}

#
# Wait for job to start
#
wait_for_job -fail $job_id "RUNNING"

set node_name [get_node $job_id]

set spare_node [get_max_spare_node_cnt]
for {set count 0} {$count <= $spare_node} {incr count 1} {
	set drain_match 0
	spawn $smd -d $node_name $job_id -R test
	expect {
		-re "Job $job_id node $node_name is being drained" {
			set drain_match 1
			exp_continue
		}
		timeout {
			fail "smd is not responding"
		}
		eof {
			wait
		}
	}
	if {$drain_match != 1} {
		log_error "smd did not drain node"
	}

	set replace_match 0
	spawn $smd -r $node_name $job_id
	expect {
		-re "Job $job_id got node $node_name replaced with node ($re_word_str)" {
			set new_node $expect_out(1,string)
			set replace_match 1
			exp_continue
		}
		-re "Job has reached MaxSpareNodeCount limit" {
			log_debug "Do not worry this error is expected"
			set replace_match 2
			exp_continue
		}
		timeout {
			fail "smd is not responding"
		}
		eof {
			wait
		}
	}
	if {$replace_match == 0} {
		fail "smd did not replace node or smd did not reach MaxSpareNodeLimit when it should have"
	}

	reset_state $node_name
	set node_name $new_node
	if {$replace_match == 2} {
		break
	}
}

spawn $sinfo --noheader -o "State=%T NodeName=%N"
expect {
	-re "State=fail NodeName=($re_word_str)" {
		set fail_node $expect_out(1,string)
		exp_continue
	}
	timeout {
		fail "sinfo is not responding"
	}
	eof {
		wait
	}
}


